setwd("~/Downloads/M A S T E R S /Sem-3/Part-1/Visualization/labs/lab-5")
data = read.csv2("Oilcoal.csv",header = TRUE)
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
data %>% plot_ly(x = ~Oil, y = ~Coal, type = 'scatter',size = ~Marker.size,color=~Country, mode="markers", frame= ~Year)
Additionally the above 2 countries are the only ones with a marker size of 1 so this kind of trend can be due to their high population.
While the trend of the rest of the countries hover around a certain range for the oil and coal consumption.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
data_1<-data %>%filter(Country =="US" | Country == "Japan")
plot_ly(data_1, x=~Oil, y=~Coal,type = 'scatter',size = ~Marker.size,color=~Country, mode="markers", frame =~Year)%>%animation_opts(
100, easing = "cubic", redraw = F
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
We see a drop in oil consumption during 1980-1984, which was due to the 1979 oil crisis which was cause due to the Iranian Revolution. This lead to the rise in oil prices, hence a reduction in consumption.
And due to this factor, there was a push towards coal to reduce reliance on oil and we can see that in the plot that after 1984, the coal consumption rose.
We also see a drop in the consumption of oil between 2008-2009 for both countries which could be due to the financial crisis.
data_2 <- data%>% mutate(Oil_prop = (Oil/(Oil + Coal)) * 100)
data_3 <- data%>% mutate(Oil_prop = 0)
data_4 <- rbind(data_2,data_3)
data_4%>% plot_ly(x=~Country, y=~Oil_prop, frame =~Year,type = 'bar')%>%animation_opts(
100, easing = "cubic", redraw = F
) %>% layout(showlegend = FALSE)
The advantage of the animated bar plot is that this plots helps to see easily compare different countries on the same scale while the bubble chart was a bit more difficult to compare due to the scale difference for the bigger countries i.e USA and China. The length of bars allows for easier visual quantification compared to the area of bubbles. Additionally, we are able to easily see whether countries are consuming more oil or coal as the year pass by which is harder to easily notice in the bubble chart.
The disadvantage of the animated bar plot is that we cannot incorporate more dimensions which we can for a bubble chart. Also, we are only able to see the change over time across one-axis. Additionally, the plot makes it harder to find outliers and can get cluttered as we add more countries making it harder to analyze.
data_2 <- data%>% mutate(Oil_prop = (Oil/(Oil + Coal)) * 100)
data_3 <- data%>% mutate(Oil_prop = 0)
data_4 <- rbind(data_2,data_3)
data_4%>% plot_ly(x=~Country, y=~Oil_prop, frame =~Year,type = 'bar')%>%animation_opts(
100, easing = "elastic", redraw = F
) %>% layout(showlegend = FALSE)
Advantage: With the help of elastic easing we are also able to see the speed of change of the oil proportion between each year which helps us see if the oil production sharply increases or decreases in relation to coal between 2 time points. And this fast transition helps draw immediate attention to the user.
Disadvantage: This fast transition can make it difficult to the users making it harder to grasp subtle changes.It can also be overwhelming to the user to see such fast transition. It can also distract the user from the actual insights from the data and be too focused on the animation itself.
library(tidyr)
reshaped_data <- data %>%
select(Country, Year, Coal) %>%
spread(key = Country, value = Coal) %>%
arrange(Year)
rownames(reshaped_data) <- reshaped_data$Year
mat <- as.matrix(reshaped_data[, -1])
library(tourr)
mat <- rescale(mat)
set.seed(12345)
tour<- new_tour(mat, guided_tour(cmass), NULL)
steps <- c(0, rep(1/15, 200))
Projs<-lapply(steps, function(step_size){
step <- tour(step_size)
if(is.null(step)) {
.GlobalEnv$tour<- new_tour(mat, guided_tour(cmass), NULL)
step <- tour(step_size)
}
step
}
)
## Value 0.786 18.3% better (0.781 away) - NEW BASIS
## Value 0.882 13.0% better (0.550 away) - NEW BASIS
## Value 0.894 1.4% better (0.223 away) - NEW BASIS
## Value 0.903 1.0% better (0.246 away) - NEW BASIS
## Value 0.951 5.4% better (0.446 away) - NEW BASIS
## Value 0.969 1.8% better (0.264 away) - NEW BASIS
## Value 0.979 1.1% better (0.147 away) - NEW BASIS
## Value 0.981 0.2% better (0.049 away) - NEW BASIS
## Value 0.981 0.1% better (0.044 away)
## Value 0.982 0.1% better (0.045 away) - NEW BASIS
## Value 0.982 0.1% better (0.062 away)
## Value 0.983 0.2% better (0.076 away) - NEW BASIS
## Value 0.983 0.1% better (0.069 away)
## Value 0.983 0.1% better (0.066 away)
## Value 0.983 0.1% better (0.042 away)
## Value 0.983 0.1% better (0.057 away)
## Value 0.983 0.1% better (0.055 away)
## Value 0.983 0.1% better (0.054 away)
## Value 0.984 0.2% better (0.086 away) - NEW BASIS
## Value 0.984 0.0% better (0.024 away)
## Value 0.984 0.1% better (0.064 away)
## Value 0.984 0.1% better (0.080 away)
## Value 0.984 0.0% better (0.017 away)
## Value 0.984 0.0% better (0.017 away)
## Value 0.985 0.1% better (0.111 away) - NEW BASIS
## Value 0.985 0.0% better (0.022 away)
## Value 0.986 0.1% better (0.067 away) - NEW BASIS
## Value 0.986 0.0% better (0.025 away)
## Value 0.986 0.0% better (0.024 away)
## Value 0.986 0.0% better (0.072 away)
## Value 0.986 0.0% better (0.011 away)
## Value 0.986 0.0% better (0.022 away)
## Value 0.986 0.0% better (0.008 away)
## Value 0.986 0.1% better (0.077 away)
## Value 0.986 0.0% better (0.019 away)
## Value 0.986 0.0% better (0.014 away)
## Value 0.986 0.0% better (0.028 away)
## Value 0.986 0.0% better (0.047 away)
## Value 0.986 0.0% better (0.026 away)
## Value 0.986 0.0% better (0.040 away)
## Value 0.986 0.0% better (0.042 away)
## Value 0.987 0.1% better (0.199 away)
## Value 0.987 0.1% better (0.111 away)
## Value 0.986 0.0% better (0.069 away)
## Value 0.986 0.1% better (0.073 away)
## Value 0.986 0.0% better (0.021 away)
## Value 0.986 0.1% better (0.083 away)
## Value 0.986 0.0% better (0.033 away)
## Value 0.986 0.0% better (0.016 away)
## Value 0.986 0.0% better (0.026 away)
## Value 0.986 0.0% better (0.016 away)
## No better bases found after 25 tries. Giving up.
## Final projection:
## 0.675 0.091
## -0.098 0.746
## -0.059 0.211
## -0.084 -0.222
## -0.173 -0.488
## -0.693 0.118
## 0.116 0.077
## 0.020 -0.290
## Value 0.841 26.6% better (0.781 away) - NEW BASIS
## Value 0.931 11.1% better (0.661 away) - NEW BASIS
## Value 0.964 3.5% better (0.444 away) - NEW BASIS
## Value 0.971 0.8% better (0.111 away) - NEW BASIS
## Value 0.975 0.4% better (0.130 away) - NEW BASIS
## Value 0.977 0.2% better (0.073 away) - NEW BASIS
## Value 0.982 0.6% better (0.143 away) - NEW BASIS
## Value 0.984 0.3% better (0.086 away) - NEW BASIS
## Value 0.986 0.2% better (0.064 away) - NEW BASIS
## Value 0.986 0.0% better (0.035 away)
## Value 0.987 0.1% better (0.085 away)
## Value 0.986 0.0% better (0.019 away)
## Value 0.986 0.0% better (0.019 away)
## Value 0.986 0.1% better (0.050 away)
## Value 0.986 0.1% better (0.035 away)
## Value 0.986 0.1% better (0.042 away)
## Value 0.986 0.0% better (0.034 away)
## Value 0.987 0.1% better (0.061 away)
## Value 0.986 0.0% better (0.019 away)
## Value 0.986 0.0% better (0.036 away)
## Value 0.986 0.1% better (0.048 away)
## Value 0.986 0.0% better (0.026 away)
## Value 0.986 0.1% better (0.047 away)
## Value 0.986 0.0% better (0.039 away)
## Value 0.988 0.2% better (0.140 away) - NEW BASIS
## Value 0.988 0.0% better (0.028 away)
## Value 0.988 0.1% better (0.056 away)
## Value 0.988 0.1% better (0.047 away)
## Value 0.987 0.0% better (0.016 away)
## Value 0.988 0.0% better (0.029 away)
## Value 0.987 0.0% better (0.015 away)
## Value 0.989 0.2% better (0.248 away) - NEW BASIS
## Value 0.990 0.1% better (0.038 away)
## Value 0.989 0.0% better (0.038 away)
## Value 0.989 0.0% better (0.034 away)
## Value 0.989 0.0% better (0.065 away)
## Value 0.989 0.0% better (0.030 away)
## Value 0.989 0.0% better (0.042 away)
## Value 0.989 0.0% better (0.034 away)
## Value 0.990 0.1% better (0.077 away) - NEW BASIS
## Value 0.990 0.0% better (0.034 away)
## Value 0.990 0.0% better (0.026 away)
## Value 0.990 0.0% better (0.018 away)
## Value 0.990 0.0% better (0.034 away)
## Value 0.990 0.0% better (0.034 away)
## Value 0.990 0.0% better (0.012 away)
## Value 0.990 0.0% better (0.063 away)
## Value 0.990 0.0% better (0.010 away)
## Value 0.990 0.0% better (0.009 away)
## Value 0.990 0.0% better (0.023 away)
## Value 0.990 0.0% better (0.030 away)
## Value 0.990 0.1% better (0.087 away)
## Value 0.990 0.0% better (0.014 away)
## Value 0.990 0.0% better (0.012 away)
## Value 0.990 0.0% better (0.020 away)
## Value 0.990 0.0% better (0.052 away)
## Value 0.990 0.0% better (0.013 away)
## Value 0.990 0.0% better (0.020 away)
## Value 0.990 0.0% better (0.025 away)
## Value 0.990 0.0% better (0.017 away)
## Value 0.990 0.0% better (0.025 away)
## Value 0.990 0.0% better (0.018 away)
## Value 0.990 0.0% better (0.023 away)
## Value 0.990 0.0% better (0.029 away)
## No better bases found after 25 tries. Giving up.
## Final projection:
## 0.720 0.068
## -0.248 0.526
## 0.157 0.613
## 0.145 -0.459
## 0.016 -0.182
## -0.182 -0.308
## -0.375 -0.048
## -0.448 0.044
## Value 0.829 24.8% better (0.781 away) - NEW BASIS
## Value 0.896 8.6% better (0.464 away) - NEW BASIS
## Value 0.941 5.0% better (0.374 away) - NEW BASIS
## Value 0.961 2.2% better (0.497 away) - NEW BASIS
## Value 0.966 0.4% better (0.085 away) - NEW BASIS
## Value 0.973 0.9% better (0.178 away) - NEW BASIS
## Value 0.978 0.6% better (0.123 away) - NEW BASIS
## Value 0.979 0.1% better (0.041 away) - NEW BASIS
## Value 0.980 0.3% better (0.122 away) - NEW BASIS
## Value 0.983 0.3% better (0.154 away) - NEW BASIS
## Value 0.986 0.3% better (0.100 away) - NEW BASIS
tour_dat <- function(i) {
step <- Projs[[i]]
proj <- center(mat %*% step$proj)
data.frame(x = proj[,1], y = proj[,2], state = rownames(mat))
}
# projection of each variable's axis
proj_dat <- function(i) {
step <- Projs[[i]]
data.frame(
x = step$proj[,1], y = step$proj[,2], variable = colnames(mat)
)
}
stepz <- cumsum(steps)
# tidy version of tour data
tour_dats <- lapply(1:length(steps), tour_dat)
tour_datz <- Map(function(x, y) cbind(x, step = y), tour_dats, stepz)
tour_dat <- dplyr::bind_rows(tour_datz)
# tidy version of tour projection data
proj_dats <- lapply(1:length(steps), proj_dat)
proj_datz <- Map(function(x, y) cbind(x, step = y), proj_dats, stepz)
proj_dat <- dplyr::bind_rows(proj_datz)
ax <- list(
title = "", showticklabels = FALSE,
zeroline = FALSE, showgrid = FALSE,
range = c(-1.1, 1.1)
)
# for nicely formatted slider labels
options(digits = 3)
tour_dat <- highlight_key(tour_dat, ~state, group = "A")
tour <- proj_dat %>%
plot_ly(x = ~x, y = ~y, frame = ~step, color = I("black")) %>%
add_segments(xend = 0, yend = 0, color = I("gray80")) %>%
add_text(text = ~variable) %>%
add_markers(data = tour_dat, text = ~state, ids = ~state, hoverinfo = "text") %>%
layout(xaxis = ax, yaxis = ax, showlegend = FALSE)#%>%animation_opts(frame=0, transition=0, redraw = F)
tour
a <- data %>%
select(Country, Year, Coal) %>%
spread(key = Country, value = Coal) %>%
arrange(Year)
a <- a %>% select(Year, Brazil)
ggplot(a, aes(Year, Brazil)) + geom_path(lineend = "butt",
linejoin = "round", linemitre = 1)
Yes, we are able to find 2 clusters and they do correspond to different year ranges. One cluster is from 1965 to mid 1980s and the other cluster is from min 1980s to 2009.
From the plot, we can see that Brazil has the biggest contribution to the projection. Based on the time-series plot for Brazil, we can see 2 distinct trends, one before mid 1980s and one after mid 1980s and this supports the clusters we found suggesting that impact that this variable has on creating the clusters.